Principal Causes of Death¶

In [3]:
secret_password = "secret"
In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

def summarize(dataframe):
    cols = dataframe.columns
    print("*** Dataframe info ***", end = "\n\n")
    dataframe.info()
    
    print()
    print("*** Column Unique Values ***", end = "\n\n")
    for i in range(len(cols)):
        print(f"{cols[i]} values: {dataframe[cols[i]].unique()}", end = "\n\n")

df = pd.read_csv("data/PrincipalCausesofDeath.csv")

summarize(df)
*** Dataframe info ***

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 172 entries, 0 to 171
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   year               172 non-null    int64  
 1   rank               172 non-null    int64  
 2   icd                172 non-null    object 
 3   classification     172 non-null    object 
 4   disease_condition  172 non-null    object 
 5   percentage_deaths  172 non-null    float64
dtypes: float64(1), int64(2), object(3)
memory usage: 8.2+ KB

*** Column Unique Values ***

year values: [2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019
 2020 2021 2022]

rank values: [ 1  2  3  4  5  6  7  8  9 10]

icd values: ['ICD9' 'ICD10']

classification values: ['140-208' '410-414' '480-486' '430-438' 'E800-E999'
 '393-398, 402, 415-429' '250' '490-493, 496' '599' '580-589' 'C00-C97'
 'J12-J18' 'I20-I25' 'I60-I69' 'V01-Y89' 'I10-I15'
 'N00-N07, N17-N19, N25-N27' 'N39.0' 'J40-J44' 'I00-I09, I26-I51'
 'E10-E14' 'JE10-E14']

disease_condition values: ['Cancer' 'Ischaemic Heart Disease' 'Pneumonia'
 'Cerebrovascular Disease (including stroke)'
 'Accidents, Poisoning & Violence' 'Other Heart Diseases'
 'Diabetes Mellitus' 'Chronic Obstructive Lung Disease'
 'Urinary Tract Infection' 'Nephritis, Nephrotic Syndrome & Nephrosis'
 'Ischaemic heart diseases' 'Cerebrovascular diseases (including stroke)'
 'External causes of morbidity and mortality'
 'Hypertensive diseases (including hypertensive heart disease)'
 'Nephritis, nephrotic syndrome & nephrosis' 'Urinary tract infection'
 'Chronic obstructive lung disease' 'Other heart diseases']

percentage_deaths values: [28.5 18.5 13.7  8.9  6.3  4.3  3.3  2.   1.7 27.7 19.8 13.9  8.7  6.
  3.6  2.6  2.2 29.3 20.1  8.3  5.8  4.   2.7  2.5  2.1 19.2 15.3  8.
  5.7  4.4  2.4  2.3 18.7 15.7  8.4  5.5  4.8  1.  30.  16.4 16.   9.
  5.  30.1 16.8 16.1  9.3  5.6  2.8  1.9 30.5 15.5  4.9  3.1  1.6 29.4
 19.   4.7  1.8 29.7 19.4 16.7  6.8  4.5  3.9 29.6 19.3 17.   6.6 29.1
  3.4  1.5 28.8 20.6 18.1  3.   1.3 28.4 20.7 18.8  1.4 28.6 20.5  3.7
  2.9  1.2 26.4 18.4  6.1  1.1 23.9 20.  19.7]

In [5]:
fig, ax = plt.subplots(figsize=(20,12))
df = df[df["year"] >= 2016]
ax.set_ylim(0,45)
ax.grid()
sns.set(style='white')
sns.barplot(x="year",y="percentage_deaths",hue="disease_condition",data=df)

plt.xlabel("Year")
plt.ylabel("Percentage Deaths by Disease (%)")
plt.legend(title="Diseases", loc="upper right")
plt.show()
In [6]:
df_cancer = pd.read_csv("data/AgeStandardisedMortalityRateforCancer.csv")
df_ihd = pd.read_csv("data/AgeStandardisedMortalityRateforIschaemicHeartDisease.csv")
df_stroke = pd.read_csv("data/AgeStandardisedMortalityRateforStroke.csv")
summarize(df_cancer)
summarize(df_ihd)
summarize(df_stroke)
*** Dataframe info ***

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   year    30 non-null     int64  
 1   cancer  30 non-null     float64
dtypes: float64(1), int64(1)
memory usage: 612.0 bytes

*** Column Unique Values ***

year values: [1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003
 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017
 2018 2019]

cancer values: [244.5 241.  240.8 231.8 240.7 230.8 227.2 223.  218.5 207.3 199.6 204.2
 198.6 184.9 180.1 169.3 170.  165.2 157.6 153.2 157.5 154.9 155.1 143.9
 141.9 135.9 134.2 130.9 122.2]

*** Dataframe info ***

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   year    30 non-null     int64  
 1   ihd     30 non-null     float64
dtypes: float64(1), int64(1)
memory usage: 612.0 bytes

*** Column Unique Values ***

year values: [1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003
 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017
 2018 2019]

ihd values: [178.9 165.5 165.8 157.  158.5 147.2 147.8 145.7 142.1 136.1 122.3 125.6
 112.5 104.9  97.   94.   86.6  93.6  93.4  84.5  77.2  69.6  67.   61.6
  61.4  62.   61.   65.6  63.9  62.6]

*** Dataframe info ***

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   year    30 non-null     int64  
 1   stroke  30 non-null     float64
dtypes: float64(1), int64(1)
memory usage: 612.0 bytes

*** Column Unique Values ***

year values: [1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003
 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017
 2018 2019]

stroke values: [95.8 95.5 83.7 83.1 81.2 78.2 74.7 66.7 61.4 55.3 53.4 44.4 42.2 41.7
 40.8 37.3 32.9 30.8 30.2 26.3 28.1 28.2 28.9 25.4 23.5 19.9 19.6 18.4
 18.2 16.4]

In [7]:
df = pd.concat([df_cancer,df_ihd.drop(columns="year"),df_stroke.drop(columns="year")], axis=1)
df.head()

sns.lineplot(data=df,x="year",y="cancer",label="cancer")
sns.lineplot(data=df,x="year",y="ihd",label="ihd")
sns.lineplot(data=df,x="year",y="stroke",label="stroke")
plt.grid(linestyle="--")
plt.ylim(0,300)
plt.xlabel("Year")
plt.ylabel("Age Standardised Mortality Rate")
Out[7]:
Text(0, 0.5, 'Age Standardised Mortality Rate')

Healthcare professionals¶

In [8]:
df = pd.read_csv("data/NumberofGraduatesinHealthcareSpecialisationsbyCourse.csv")
df = df[df["no_of_graduates"] != "na"]
df = df.astype({'no_of_graduates': 'int64'})
summarize(df)
*** Dataframe info ***

<class 'pandas.core.frame.DataFrame'>
Index: 94 entries, 0 to 95
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   year             94 non-null     int64 
 1   graduate_type    94 non-null     object
 2   no_of_graduates  94 non-null     int64 
dtypes: int64(2), object(1)
memory usage: 2.9+ KB

*** Column Unique Values ***

year values: [2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019
 2020 2021]

graduate_type values: ['Medical' 'Nursing' 'Allied Health Professionals' 'Dentistry' 'Pharmacy'
 'Oral Health Therapist']

no_of_graduates values: [ 229 1140  115   30   81  227 1446  188   36   86 1538  151   35   22
   87  219 1523  124   31   15   98  228 1619  141   43   21  118  279
 1665  163   42  107  290 1744  257  123  303 1641  206   48  138  312
 1564  222   45  302 1479   51   17  150  328 1558  210   57   23  165
  353 1536   56   24  198  423 1668  241   52    0  183  437 1818   89
  448 1958  456 2102  252   58]

In [9]:
df = df[df["year"] >= 2017]

fig, ax = plt.subplots(figsize=(15,9))

sns.set(style='white')
sns.barplot(x="year",y="no_of_graduates",hue="graduate_type",data=df)
ax.grid(linestyle="--")
ax.set_ylim(0,2500)
plt.xlabel("Year")
plt.ylabel("Number of graduates")
plt.legend(title="Healthcare Professionals", loc="upper left")
plt.show()

Polyclinic Attendance¶

In [10]:
df = pd.read_csv("data/Top4ConditionsofPolyclinicAttendances.csv")

summarize(df)
*** Dataframe info ***

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 64 entries, 0 to 63
Data columns (total 3 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   year                  64 non-null     int64  
 1   condition             64 non-null     object 
 2   percentage_diagnoses  64 non-null     float64
dtypes: float64(1), int64(1), object(1)
memory usage: 1.6+ KB

*** Column Unique Values ***

year values: [2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019
 2020 2021]

condition values: ['Hyperlipidemia' 'Hypertensive Disease'
 'Acute Upper Respiratory Tract Infection including Influenza'
 'Diabetes Mellitus']

percentage_diagnoses values: [13.8 17.2 11.9  8.6 14.9 17.3 11.6  9.  15.5 10.8  9.1 15.8 16.6 11.1
 15.6 15.9 11.2  8.9 15.7 10.6  8.8 16.  10.5 16.2  9.6  9.2 15.   9.3
 14.2 14.4 13.7 13.2  9.4 13.3 15.1  4.7 15.2 14.5 10.3  3.6]

In [11]:
condition_dict = { x:[] for x in list(df["condition"].unique()) }

for i in range(0, len(df["percentage_diagnoses"])):
    row = df.iloc[i,]
    condition_dict[row["condition"]].append(row["percentage_diagnoses"])

width = 0.5

fig, ax = plt.subplots(figsize=(12,6))
bottom = np.zeros(16, dtype=float)

for disease, values in condition_dict.items():
    p = ax.bar(list(df["year"].unique()), values, width, label=disease, bottom=bottom)
    bottom += values

ax.grid(linestyle="--")
ax.set_ylim(0,70)
ax.set_xlabel("Year")
ax.set_ylabel("Percentage diagnosis")
ax.legend(loc="upper right")

plt.show()
In [12]:
df_2021 = df[df["year"] == 2021]
explode = (0.05, 0.05, 0.05, 0.05)
color = ["blue","orange","red","green"]
plt.pie(df_2021["percentage_diagnoses"], labels = df_2021["condition"], autopct='%.2f%%', explode = explode, colors = color, startangle=150)
plt.title("Polyclinic attendance for 4 major illnesses in 2021")
plt.show()

Hospital admissions¶

In [13]:
df = pd.read_csv("data/HospitalAdmissionsBySectorAnnual.csv")
summarize(df)
df = df[(df["year"] >= 2006) & (df["level_2"] == "Public") & (df["level_1"] == "Acute Hospitals Admissions")]
df = df.astype({'value': 'int64'})
df.head()
*** Dataframe info ***

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 222 entries, 0 to 221
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   year     222 non-null    int64 
 1   level_1  222 non-null    object
 2   level_2  222 non-null    object
 3   value    222 non-null    object
dtypes: int64(1), object(3)
memory usage: 7.1+ KB

*** Column Unique Values ***

year values: [1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997
 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011
 2012 2013 2014 2015 2016 2017 2018 2019 2020]

level_1 values: ['Acute Hospitals Admissions' 'Psychiatric Hospitals Admissions'
 'Community Hospitals Admissions']

level_2 values: ['Public' 'Non-public']

value values: ['na' '308016' '92620' '8245' '269' '0' '6229' '317355' '95850' '8417'
 '288' '7312' '321187' '91013' '8884' '7013' '323535' '90057' '9060'
 '7612' '333573' '93732' '9759' '7534' '347398' '98986' '9628' '572'
 '7107' '354076' '105826' '8382' '688' '7063' '364911' '109282' '8110'
 '1053' '7649' '372661' '116913' '9050' '1467' '7600' '379973' '119916'
 '8986' '2187' '8483' '416663' '129593' '9028' '5432' '8449' '436882'
 '131235' '9316' '7361' '8994' '455272' '129547' '9215' '8271' '483939'
 '134197' '9234' '10215' '9828' '444863' '111648' '8426' '11169' '9679']

Out[13]:
year level_1 level_2 value
132 2006 Acute Hospitals Admissions Public 308016
138 2007 Acute Hospitals Admissions Public 317355
144 2008 Acute Hospitals Admissions Public 321187
150 2009 Acute Hospitals Admissions Public 323535
156 2010 Acute Hospitals Admissions Public 333573
In [14]:
fig, ax = plt.subplots(figsize = (10,6))

plt.ylim(275000,550000)
plt.grid()
plt.xlabel("Year")
plt.ylabel("Admissions")
plt.title("Public Acute Hospitals Admissions")
sns.set(style='white')
sns.histplot(
    df, x="year", y="value",
    bins=30, discrete=(True, False), log_scale=(False, True)
)
Out[14]:
<Axes: title={'center': 'Public Acute Hospitals Admissions'}, xlabel='Year', ylabel='Admissions'>

Government expenditure¶

In [15]:
df = pd.read_csv("data/GovernmentTotalExpenditure.csv")
summarize(df)
df = df[df["ministry"] == "Health"]
df.head()

def convert_from_m_to_b(dataframe):
    dataframe["amount"] = dataframe["amount"] / 1000

convert_from_m_to_b(df)

fig, ax = plt.subplots(figsize=(8,5))
plt.title("Singapore's Healthcare Expenditure by Year\n")
plt.xlabel("Financial Year")
plt.ylabel("Amount ($ billions)")
plt.grid(linestyle="--")
sns.lineplot(df, x="financial_year",y="amount",hue="type")
plt.legend(title="Expenditure type")
plt.show()
*** Dataframe info ***

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1120 entries, 0 to 1119
Data columns (total 7 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   financial_year            1120 non-null   int64  
 1   actual_revised_estimated  1120 non-null   object 
 2   sector                    1120 non-null   object 
 3   ministry                  1120 non-null   object 
 4   type                      1120 non-null   object 
 5   amount                    1120 non-null   int64  
 6   percent_of_gdp            1120 non-null   float64
dtypes: float64(1), int64(2), object(4)
memory usage: 61.4+ KB

*** Column Unique Values ***

financial_year values: [1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010
 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024]

actual_revised_estimated values: ['Actual' 'Revised' 'Estimated']

sector values: ['Economic Development' 'Government Administration'
 'Security and External Relations' 'Social Development']

ministry values: ['Info-Communications and Media Development' 'Manpower'
 'Trade and Industry' 'Transport' 'Communications and Information'
 'Finance' 'Law' 'Organs of State' "Prime Minister's Office" 'Defence'
 'Foreign Affairs' 'Home Affairs' 'Culture, Community and Youth'
 'Education' 'Health' 'National Development'
 'Social and Family Development' 'Sustainability and the Environment']

type values: ['Development' 'Operating']

amount values: [   39    94    82   179    23     5    25    33    46    28    48    38
   525   278   126    68   296   153   105     4     8    29    53    17
    37     0    24    47    42    34    76    88   160   227   267   378
   454   490   617   535   731   765   855  1104   836    12     6    18
    54    52    36    15    35    22    62    21    65    79    85   127
   132   136    91   107   130   156   158   161   166   188   225   224
   414   439   380   456   496   520   611   634   645   697   892  4166
  4187  3767  1551  1169  2175  2646  2073  1820  1293  1009  1421  1154
  1055  1207  1516  2183  2334  2673  2553  2387  1947  2102  2398  2858
  2688  3656  3176  2910  3398  3950  4465  4343   341   315   366   412
   553   564   497   392   436   473   528   660   704   684   671   685
   710   729   783   921   943   993  1116  2509  1986  2757  1603  1629
  1346  1737   720  1130  1508  1203  1115  1776  1617  1518  1621  3379
  4082  4250  4003  5414  5504  5489 10332  9319  7572  9877  6617  5327
  6534  9157 10413 11792   314   442   409  2390   406   385   292   289
   277   285   321   367   371   464   502   555   598   953  1607  1900
  1945  2945  3738  3692  2550  2447    90   637   240   338   199    57
    70   100   214   308   350    10    44    30    72    74    56   162
   298   310   339   388   352   379   333   318   345   428   438   484
   552   631   711   682   740   824   833   828   852   904   951  1002
  1173  1187   435   402   131   213   325   184   329   218   167   123
   148   316   273   434   319   377   320   217   311   197   387    89
    97   109   120   112   103    96   106   118   119   163   177   174
   181   185   196   207   212   263   279    98   251   151    31    11
    14    55    59    61   150   135   170   173   186   182   187   195
   247   235   249   313   326   384   410   440   508   578   659   693
   750    32    41    40     9    20   159   172   250   121   108   134
   143   141   176   194   203   259   290   381   374   653   670   746
   692  1004  1036  1144   619   418   775   620   861   730   509   524
   363   355   349   328   479   468   422   429   431   543   594   482
   481   556   609   871   905  6097  6481  6647  6561  7089  7694  7714
  8243  8889  9273  9660 10397 10603 10623 10797 11056 11329 11867 12672
 13281 13582 13825 13669 13027 14737 16343 18885 19344    81    49    86
    83    71    45    19    26    16   180   211   233   257   344   343
   360   382   394   447   444   398   466   494   900   847   654   570
   478   460   399   396   506   573   831  1129   972   800  1332  1050
  1020   793   994  1032  1101  1300  1580  1563  1708  1752  1825  2010
  2428  2478  2577  2937  2947  3266  3522  4029  4377  4652  4996  5505
  5703  6013  6678  6942  7044  7478   137   139   155   110    95   128
   144    27   200   220   234   241   228   294   500   430   336   354
   424   517   721   738   779   507   489   300   210  1571   364  1003
  1335  1795  1444  1586  1668  1674  1927  2096  2272  1951  2099  1102
  1686  1601  1591  1473  1774  1218  1239   867   608   742   753   877
  1042   860   973   886   699   657   791   493   305   332  3348  3167
  3257  4277  4767  4824  4997  4975  5215  6352  6786  7477  7838  8999
  9698  9637 10665 10712 11236 11812 12080 12429 11932 11767 12605 12890
 13748 14132   274   140   145   114   485   453   605   723  1147  1413
  1619  1465  1490  1404   949  1049  1204  1272  1375   896   992   936
  1072  1445  1451  1904  1604  1680  1840  2019  2379  2920  3258  3489
  4066  5044  5872  7520  8199  8734  8937  9915 14311 16274 15909 16674
 17397     1     2     3   340   469   666   709   706  1028  1027  1029
  2291  1562  1797  1888  2003  2172  2069  1770  1089  1865  1153  1010
   675  1095  1307  1646  1572  1354   901  1308  1321  1248  1257  1191
   926  1110  1221  1135   441   358   805   959   790  1428   673   858
  2284  3218  2941  2606  4626  4710  7851  7475   169   138    92   115
    64   111    87    93    84   104   124   513   572   814   844   903
   962  1182  1565  1829  1721  1739  1598  1722  2121  2389  2459  2608
  2835  3673  3592  3653  4057  4561   652   945   771   952   323   427
   567  1549   839  1076   708   832   390   448   408   606   656   726
   813   958  1125  1201  1295  1540  1832  1848  2420  2834]

percent_of_gdp values: [0.    0.001 0.002 0.008 0.007 0.006 0.015 0.019 0.014 0.011 0.005 0.003
 0.004 0.009 0.012 0.013 0.024 0.021 0.016 0.041 0.046 0.044 0.039 0.042
 0.038 0.035 0.036 0.032 0.03  0.029 0.028 0.027 0.026 0.01  0.022 0.025
 0.023 0.02  0.018 0.017]

In [16]:
import plotly.express as px
df = pd.read_csv("data/GovernmentTotalExpenditure.csv")
df = df[df["type"] == "Operating"]
fig = px.scatter(df, x="percent_of_gdp", y="amount", animation_frame="financial_year", animation_group="ministry",
           size="amount", color="ministry", hover_name="ministry",
          log_y=True, size_max=120, range_x=[-0.01,0.06], range_y=[10,1000000])
fig.show()
−0.0100.010.020.030.040.050.0610251002510002510k25100k251M
ministryInfo-Communications and Media DevelopmentManpowerTrade and IndustryTransportCommunications and InformationFinanceLawOrgans of StatePrime Minister's OfficeDefenceForeign AffairsHome AffairsCulture, Community and YouthEducationHealthNational DevelopmentSocial and Family DevelopmentSustainability and the Environmentfinancial_year=19971997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024percent_of_gdpamount▶◼
plotly-logomark

Medisave account¶

In [17]:
df = pd.read_csv("data/MedisaveAccountsandBalancesAnnual.csv")
df.head()
summarize(df)
*** Dataframe info ***

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16 entries, 0 to 15
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   year              16 non-null     int64  
 1   no_of_accounts    16 non-null     int64  
 2   total_balance     16 non-null     float64
 3   average_balance   16 non-null     int64  
 4   amount_withdrawn  16 non-null     int64  
dtypes: float64(1), int64(4)
memory usage: 772.0 bytes

*** Column Unique Values ***

year values: [2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019
 2020 2021]

no_of_accounts values: [2700000 2800000 2900000 3000000 3100000 3200000 3300000 3400000 3500000
 3600000 3700000 3800000]

total_balance values: [ 36.9  39.3  42.4  45.8  50.2  54.8  60.   64.9  70.5  75.9  82.1  88.6
  96.1 102.  110.1 117.6]

average_balance values: [13600 14100 14900 15700 16900 18200 19400 20600 21800 22700 24200 25600
 26700 27900 29600 31000]

amount_withdrawn values: [ 445  517  558  601  678  722  767  798  853  900  931  964 1027 1089
 1007 1115]

In [18]:
sns.lmplot(df,x="average_balance", y="amount_withdrawn", hue="year")
plt.title("Relationship between Medisave balance and withdrawal\n")
plt.xlabel("Average Medisave balance $")
plt.ylabel("Amount withdrawn $")
C:\Users\jinji\anaconda3\Lib\site-packages\seaborn\axisgrid.py:118: UserWarning:

The figure layout has changed to tight

Out[18]:
Text(48.59481250000002, 0.5, 'Amount withdrawn $')
In [19]:
sns.lmplot(df,x="average_balance", y="amount_withdrawn")
plt.title("Relationship between Medisave balance and withdrawal\n")
plt.xlabel("Average Medisave balance $")
plt.ylabel("Amount withdrawn $")
C:\Users\jinji\anaconda3\Lib\site-packages\seaborn\axisgrid.py:118: UserWarning:

The figure layout has changed to tight

Out[19]:
Text(20.53125, 0.5, 'Amount withdrawn $')

Insurance coverage¶

In [20]:
df = pd.read_csv("data/NumberofPolicyholdersforMediShieldandIntegratedShieldPlansAnnual.csv")
df.tail()
summarize(df)
*** Dataframe info ***

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16 entries, 0 to 15
Data columns (total 3 columns):
 #   Column                            Non-Null Count  Dtype 
---  ------                            --------------  ----- 
 0   year                              16 non-null     int64 
 1   policyholders                     16 non-null     object
 2   policyholders_with_private_plans  16 non-null     int64 
dtypes: int64(2), object(1)
memory usage: 516.0+ bytes

*** Column Unique Values ***

year values: [2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019
 2020 2021]

policyholders values: ['2760000' '2870000' '3076000' '3299000' '3390000' '3500000' '3541000'
 '3593000' '3656000' '-']

policyholders_with_private_plans values: [1510000 1660000 1790000 1930000 2068000 2200000 2318000 2404000 2485000
 2548000 2619000 2686000 2749000 2801000 2830000 2864000]

In [21]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import mysql.connector

connect = mysql.connector.connect(
    user = "root",
    password = secret_password,
    host = "127.0.0.1",
    database = "spdatabase"
)

query = "SELECT * FROM spdatabase.policyholders"

df = pd.read_sql(query, con = connect)

plt.plot(df["year"],df["policyholders_with_private_plans"])
plt.xlabel("Year")
plt.ylabel("Number of policy holders with private plans (millions)")
plt.grid(linestyle="--")
plt.show()

connect.close()
C:\Users\jinji\AppData\Local\Temp\ipykernel_23720\1233250901.py:15: UserWarning:

pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.